##Seurat, dpylr, reshape2, ggplot2, patchwork, cowplot needed
#install.packages("Seurat")
library(Seurat)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(reshape2)
#library(SeuratData)
## Data Integration - Standard Workflow
DefaultAssay(p01.counts) <- "RNA"
DefaultAssay(p02.counts) <- "RNA"
DefaultAssay(p10.counts) <- "RNA"
DefaultAssay(norm.brca.integrated.epithelial) <- "RNA"
#SplitObject command will generate subsets of a Seurat object in list form based on the given metadata column. For this, we subset on the "Compartment" column, to generate two new objects, the epithelial compartment and the stromal compartment
norm.brca.subset.objects<-SplitObject(norm.brca.integrated.epithelial,"Compartment")
#We now seperate out each component of the list into seperate Seurat objects. The "$" command specifies which part of the full list we want to access.
norm.brca.epithelial.object<-norm.brca.subset.objects$EPITHELIAL
norm.brca.stromal.object<-norm.brca.subset.objects$STROMAL
#SplitObject command will generate subsets of a Seurat object in list form based on the given metadata column. For this, we subset on the "individual" column, to generate 6 new objects, for each patient.
norm.brca.epithelial.individual.objects<-SplitObject(norm.brca.epithelial.object,"individual")
#We now seperate out each component of the list into seperate Seurat objects. The "$" command specifies which part of the full list we want to access.
norm.brca.epithelial.individual.1.objects<-norm.brca.epithelial.individual.objects$ind1
norm.brca.epithelial.individual.2.objects<-norm.brca.epithelial.individual.objects$ind2
norm.brca.epithelial.individual.3.objects<-norm.brca.epithelial.individual.objects$ind3
norm.brca.epithelial.individual.4.objects<-norm.brca.epithelial.individual.objects$ind4
norm.brca.epithelial.individual.9.objects<-norm.brca.epithelial.individual.objects$ind9
norm.brca.epithelial.individual.10.objects<-norm.brca.epithelial.individual.objects$ind10
#To construct a reference, we will identify ‘anchors’ between the individual datasets. First, we combine each Seurat object into a list, with each dataset as an element.
#The list() function groups elements together in the form of list("X1"=Y1,"X2",Y2,...), where Xn is the name you want the list element to be called and Yn is the component you want added to the list. For this analysis, we want to keep track of which Seurat object belongs to which model/patient.
#first part
standard.workflow.object.list <-
#list("hci001"=p01.cc.updated,"hci002"=p02.updated,"hci010"=p10.updated)
list("hci001"=p01.counts,"hci002"=p02.counts,"hci010"=p10.counts,"n_patient1"=norm.brca.epithelial.individual.1.objects, "b_patient2"=norm.brca.epithelial.individual.2.objects, "b_patient3"=norm.brca.epithelial.individual.3.objects, "b_patient4" = norm.brca.epithelial.individual.4.objects, "n_patient9" = norm.brca.epithelial.individual.9.objects, "n_patient10"=norm.brca.epithelial.individual.10.objects )
#Prior to finding anchors, we perform standard preprocessing (log-normalization), and identify variable features individually for each. Note that Seurat v3 implements an improved method for variable feature selection based on a variance stabilizing transformation ("vst")
for (i in 1:length(standard.workflow.object.list)) {
standard.workflow.object.list[[i]] <- NormalizeData(standard.workflow.object.list[[i]], verbose = TRUE)
standard.workflow.object.list[[i]] <- FindVariableFeatures(standard.workflow.object.list[[i]], selection.method = "vst",
nfeatures = 2000, verbose = TRUE)
}
#Next we find anchors, which are pairwise correspondants between individual cells which originate from the same biological state. These anchors are then used to transfer infromation from one dataset to another
reference.list <- standard.workflow.object.list
integration.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30)
#After
#if hitting vector limit issue, open terminal, set cd to home directory, use "open .Renviron" and set the R_MAX_VSIZE=20Gb and save the file, you need to restart R for these changes to be saved
integrated.data <- IntegrateData(anchorset = integration.anchors, dims = 1:30)
#save(integrated.data, file = "/Users/paigehalas/Desktop/integrated.data.Rda")
load("/Users/paigehalas/Desktop/integrated.data.Rda")
#load necessary packages for visualization
library(ggplot2)
library(cowplot)
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
library(patchwork)
##
## Attaching package: 'patchwork'
## The following object is masked from 'package:cowplot':
##
## align_plots
# switch to integrated assay. The variable features of this assay are automatically
# set during IntegrateData
#DefaultAssay(integrated.data) <- "integrated"
scale.integrated.data <- ScaleData(integrated.data, verbose = TRUE)
PCA.integrated.data<- RunPCA(scale.integrated.data, npcs = 30, verbose = TRUE)
UMAP.integrated.data <- RunUMAP(PCA.integrated.data, reduction = "pca", dims = 1:30)
#saveRDS(UMAP.integrated.data, file = "/Users/paigehalas/Desktop/UMAP.integrated.data.rds")
UMAP.integrated.data <- readRDS("/Users/paigehalas/Desktop/UMAP.integrated.data.rds")
DimPlot(UMAP.integrated.data, reduction = "umap", label = TRUE)
## Warning: Using `as.character()` on a quosure is deprecated as of rlang 0.3.0.
## Please use `as_label()` or `as_name()` instead.
## This warning is displayed once per session.
FeaturePlot(UMAP.integrated.data, features = "nFeature_RNA")
#integrated.data <- load("Users/paigehalas/Desktop/integrated.data.rda")
DefaultAssay(UMAP.integrated.data) <- "RNA"
UMAP.integrated.data[["percent.mt"]] <- PercentageFeatureSet(UMAP.integrated.data, pattern = "^MT-")
FeaturePlot(UMAP.integrated.data, features = "percent.mt")
DefaultAssay(integrated.data) <- "integrated"
r.scale.integrated.data <- ScaleData(integrated.data,vars.to.regress=c("nFeature_RNA", "percent.mt"))
r.PCA.integrated.data<- RunPCA(r.scale.integrated.data, npcs = 30, verbose = TRUE)
r.UMAP.integrated.data <- RunUMAP(r.PCA.integrated.data, reduction = "pca", dims = 1:30)
saveRDS(r.UMAP.integrated.data, file = "/Users/paigehalas/Desktop/r.UMAP.integrated.data.rds")
r.UMAP.integrated.data <- readRDS("/Users/paigehalas/Desktop/r.UMAP.integrated.data.rds")
DimPlot(r.UMAP.integrated.data, reduction = "umap", label = TRUE)
DefaultAssay(r.UMAP.integrated.data) <- "RNA"
r.UMAP.integrated.data[["percent.mt"]] <- PercentageFeatureSet(r.UMAP.integrated.data, pattern = "^MT-")
FeaturePlot(r.UMAP.integrated.data, features = "nFeature_RNA")
FeaturePlot(r.UMAP.integrated.data, features = "percent.mt")
ds.norm.brca.epithelial.individual.1.objects <- subset(norm.brca.epithelial.individual.1.objects, downsample = 373)
ds.norm.brca.epithelial.individual.2.objects <- subset(norm.brca.epithelial.individual.2.objects, downsample = 373)
ds.norm.brca.epithelial.individual.3.objects <- subset(norm.brca.epithelial.individual.3.objects, downsample = 373)
ds.norm.brca.epithelial.individual.4.objects <- subset(norm.brca.epithelial.individual.4.objects, downsample = 373)
ds.norm.brca.epithelial.individual.9.objects <- subset(norm.brca.epithelial.individual.9.objects, downsample = 373)
ds.norm.brca.epithelial.individual.10.objects <- subset(norm.brca.epithelial.individual.10.objects, downsample = 373)
#To construct a reference, we will identify ‘anchors’ between the individual datasets. First, we combine each Seurat object into a list, with each dataset as an element.
#The list() function groups elements together in the form of list("X1"=Y1,"X2",Y2,...), where Xn is the name you want the list element to be called and Yn is the component you want added to the list. For this analysis, we want to keep track of which Seurat object belongs to which model/patient.
#first part
standard.workflow.object.list <-
#list("hci001"=p01.cc.updated,"hci002"=p02.updated,"hci010"=p10.updated)
list("hci001"=p01.counts,"hci002"=p02.counts,"hci010"=p10.counts,"n_patient1"=ds.norm.brca.epithelial.individual.1.objects, "b_patient2"=ds.norm.brca.epithelial.individual.2.objects, "b_patient3"=ds.norm.brca.epithelial.individual.3.objects, "b_patient4" = ds.norm.brca.epithelial.individual.4.objects, "n_patient9" = ds.norm.brca.epithelial.individual.9.objects, "n_patient10"=ds.norm.brca.epithelial.individual.10.objects )
#Prior to finding anchors, we perform standard preprocessing (log-normalization), and identify variable features individually for each. Note that Seurat v3 implements an improved method for variable feature selection based on a variance stabilizing transformation ("vst")
for (i in 1:length(standard.workflow.object.list)) {
standard.workflow.object.list[[i]] <- NormalizeData(standard.workflow.object.list[[i]], verbose = TRUE)
standard.workflow.object.list[[i]] <- FindVariableFeatures(standard.workflow.object.list[[i]], selection.method = "vst",
nfeatures = 2000, verbose = TRUE)
}
#Next we find anchors, which are pairwise correspondants between individual cells which originate from the same biological state. These anchors are then used to transfer infromation from one dataset to another
reference.list <- standard.workflow.object.list
ds.integration.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30)
#After
#if hitting vector limit issue, open terminal, set cd to home directory, use "open .Renviron" and set the R_MAX_VSIZE=20Gb and save the file, you need to restart R for these changes to be saved
ds.integrated.data <- IntegrateData(anchorset = ds.integration.anchors, dims = 1:30)
save(ds.integrated.data, file = "/Users/paigehalas/Desktop/ds.integrated.data.Rda")
load("/Users/paigehalas/Desktop/ds.integrated.data.Rda")
#load necessary packages for visualization
library(ggplot2)
library(cowplot)
library(patchwork)
# switch to integrated assay. The variable features of this assay are automatically
# set during IntegrateData
#DefaultAssay(integrated.data) <- "integrated"
ds.scale.integrated.data <- ScaleData(ds.integrated.data, verbose = TRUE)
ds.PCA.integrated.data<- RunPCA(ds.scale.integrated.data, npcs = 30, verbose = TRUE)
ds.UMAP.integrated.data <- RunUMAP(ds.PCA.integrated.data, reduction = "pca", dims = 1:30)
#saveRDS(UMAP.integrated.data, file = "/Users/paigehalas/Desktop/ds.UMAP.integrated.data.rds")
ds.UMAP.integrated.data <- readRDS("/Users/paigehalas/Desktop/ds.UMAP.integrated.data.rds")
DimPlot(ds.UMAP.integrated.data, reduction = "umap", label = TRUE)
FeaturePlot(ds.UMAP.integrated.data, features = "nFeature_RNA")
DefaultAssay(ds.UMAP.integrated.data) <- "RNA"
ds.UMAP.integrated.data[["percent.mt"]] <- PercentageFeatureSet(ds.UMAP.integrated.data, pattern = "^MT-")
FeaturePlot(ds.UMAP.integrated.data, features = "percent.mt")
dsr.norm.brca.epithelial.individual.1.objects <- subset(norm.brca.epithelial.individual.1.objects, downsample = 124)
dsr.norm.brca.epithelial.individual.2.objects <- subset(norm.brca.epithelial.individual.2.objects, downsample = 124)
dsr.norm.brca.epithelial.individual.3.objects <- subset(norm.brca.epithelial.individual.3.objects, downsample = 124)
dsr.norm.brca.epithelial.individual.4.objects <- subset(norm.brca.epithelial.individual.4.objects, downsample = 124)
dsr.norm.brca.epithelial.individual.9.objects <- subset(norm.brca.epithelial.individual.9.objects, downsample = 124)
dsr.norm.brca.epithelial.individual.10.objects <- subset(norm.brca.epithelial.individual.10.objects, downsample = 124)
#To construct a reference, we will identify ‘anchors’ between the individual datasets. First, we combine each Seurat object into a list, with each dataset as an element.
#The list() function groups elements together in the form of list("X1"=Y1,"X2",Y2,...), where Xn is the name you want the list element to be called and Yn is the component you want added to the list. For this analysis, we want to keep track of which Seurat object belongs to which model/patient.
#first part
standard.workflow.object.list <-
#list("hci001"=p01.cc.updated,"hci002"=p02.updated,"hci010"=p10.updated)
list("hci001"=p01.counts,"hci002"=p02.counts,"hci010"=p10.counts,"n_patient1"=dsr.norm.brca.epithelial.individual.1.objects, "b_patient2"=dsr.norm.brca.epithelial.individual.2.objects, "b_patient3"=dsr.norm.brca.epithelial.individual.3.objects, "b_patient4" = dsr.norm.brca.epithelial.individual.4.objects, "n_patient9" = dsr.norm.brca.epithelial.individual.9.objects, "n_patient10"=dsr.norm.brca.epithelial.individual.10.objects )
#Prior to finding anchors, we perform standard preprocessing (log-normalization), and identify variable features individually for each. Note that Seurat v3 implements an improved method for variable feature selection based on a variance stabilizing transformation ("vst")
for (i in 1:length(standard.workflow.object.list)) {
standard.workflow.object.list[[i]] <- NormalizeData(standard.workflow.object.list[[i]], verbose = TRUE)
standard.workflow.object.list[[i]] <- FindVariableFeatures(standard.workflow.object.list[[i]], selection.method = "vst",
nfeatures = 2000, verbose = TRUE)
}
#Next we find anchors, which are pairwise correspondants between individual cells which originate from the same biological state. These anchors are then used to transfer infromation from one dataset to another
reference.list <- standard.workflow.object.list
dsr.integration.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30)
#After
#if hitting vector limit issue, open terminal, set cd to home directory, use "open .Renviron" and set the R_MAX_VSIZE=20Gb and save the file, you need to restart R for these changes to be saved
dsr.integrated.data <- IntegrateData(anchorset = integration.anchors, dims = 1:30)
save(dsr.integrated.data, file = "/Users/paigehalas/Desktop/dsr.integrated.data.Rda")
dsr.scale.integrated.data <- ScaleData(dsr.integrated.data, verbose = TRUE)
dsr.PCA.integrated.data<- RunPCA(dsr.scale.integrated.data, npcs = 30, verbose = TRUE)
dsr.UMAP.integrated.data <- RunUMAP(dsr.PCA.integrated.data, reduction = "pca", dims = 1:30)
saveRDS(dsr.UMAP.integrated.data, file = "/Users/paigehalas/Desktop/dsr.UMAP.integrated.data.rds")
dsr.UMAP.integrated.data <- readRDS("/Users/paigehalas/Desktop/dsr.UMAP.integrated.data.rds")
DimPlot(dsr.UMAP.integrated.data, reduction = "umap", label = TRUE)
## Feature plot for nFeature_RNA to compare PDX cells and Norm/BRCA cells that are downsampled to 124
FeaturePlot(dsr.UMAP.integrated.data, features = "nFeature_RNA")
DefaultAssay(dsr.UMAP.integrated.data) <- "RNA"
dsr.UMAP.integrated.data[["percent.mt"]] <- PercentageFeatureSet(dsr.UMAP.integrated.data, pattern = "^MT-")
FeaturePlot(dsr.UMAP.integrated.data, features = "percent.mt")
#markers.RNA <- FindAllMarkers(UMAP.integrated.data, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, assay = "RNA")
#write.table(markers, file = "/Users/paigehalas/Desktop/norm.brca.pdx.std.integrated.res025.RNA.markers.RNA.txt")
#all.genes<-rownames(integrated.data@assays$RNA)
#integrated.data<-ScaleData(integrated.data,features = all.genes,assay = "RNA")
#save(integrated.data.g, file = "/Users/paigehalas/Desktop/integrated.data.all.genes.scale.rda")
#load("/Users/paigehalas/Desktop/integrated.data.all.genes.scale.rda")
#markers.RNA <- read.table("/Users/paigehalas/Desktop/norm.brca.pdx.std.integrated.res025.RNA.markers.RNA.txt")
#top10 <- markers.RNA %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
#top10 <- as.character(top10$gene)
#DoHeatmap(integrated.data, features = top10$gene, assay = "RNA") + NoLegend()
#Load TPM Seurat Objects to Transfer MetaData
load("seurat-objects/hci001.seurat3.object.Rda")
load("seurat-objects/hci002.seurat3.object.Rda")
load("seurat-objects/hci010.seurat3.object.Rda")
#Reorder HCI001 Meta Data
p01.cc.updated[["cell.names"]]<-rownames(p01.cc.updated@meta.data)
p01.cc.updated@meta.data<-arrange(p01.cc.updated@meta.data, cell.names)
rownames(p01.cc.updated@meta.data)<-as.character(p01.cc.updated$cell.names)
p01.counts[["cell.names"]]<-rownames(p01.counts@meta.data)
p01.counts@meta.data<-arrange(p01.counts@meta.data, cell.names)
rownames(p01.counts@meta.data)<-as.character(p01.counts$cell.names)
#Add HCI001 Meta Data to Counts Seurat Object
p01.counts[["mouse"]]<-p01.cc.updated$mouse
p01.counts[["burden"]]<-p01.cc.updated$burden
p01.counts[["tissue"]]<-p01.cc.updated$tissue
p01.counts[["paper.ident"]]<-p01.cc.updated$paper.ident
p01.counts[["pdx.surgery.type"]]<-p01.cc.updated$pdx.surgery.type
p01.counts[["pdx.surgical.side"]]<-p01.cc.updated$pdx.surgical.side
#Reorder HCI002 Meta Data
p02.updated[["cell.names"]]<-rownames(p02.updated@meta.data)
p02.updated@meta.data<-arrange(p02.updated@meta.data, cell.names)
rownames(p02.updated@meta.data)<-as.character(p02.updated$cell.names)
p02.counts[["cell.names"]]<-rownames(p02.counts@meta.data)
p02.counts@meta.data<-arrange(p02.counts@meta.data, cell.names)
rownames(p02.counts@meta.data)<-as.character(p02.counts$cell.names)
#Add HCI002 Meta Data to Counts Seurat Object
p02.counts[["mouse"]]<-p02.updated$mouse
p02.counts[["burden"]]<-p02.updated$burden
p02.counts[["tissue"]]<-p02.updated$tissue
p02.counts[["paper.ident"]]<-p02.updated$paper.ident
p02.counts[["pdx.surgery.type"]]<-p02.updated$pdx.surgery.type
p02.counts[["pdx.surgical.side"]]<-p02.updated$pdx.surgical.side
#Reorder HCI010 Meta Data
p10.updated[["cell.names"]]<-rownames(p10.updated@meta.data)
p10.updated@meta.data<-arrange(p10.updated@meta.data, cell.names)
rownames(p10.updated@meta.data)<-as.character(p10.updated$cell.names)
p10.counts[["cell.names"]]<-rownames(p10.counts@meta.data)
p10.counts@meta.data<-arrange(p10.counts@meta.data, cell.names)
rownames(p10.counts@meta.data)<-as.character(p10.counts$cell.names)
#Add HCI010 Meta Data to Counts Seurat Object
p10.counts[["mouse"]]<-p10.updated$mouse
p10.counts[["burden"]]<-p10.updated$burden
p10.counts[["tissue"]]<-p10.updated$tissue
p10.counts[["paper.ident"]]<-p10.updated$paper.ident
p10.counts[["pdx.surgery.type"]]<-p10.updated$pdx.surgery.type
p10.counts[["pdx.surgical.side"]]<-p10.updated$pdx.surgical.side
library(pheatmap)
library(RColorBrewer)
library(dendsort)
library(reshape2)
## Using tissue, paper.ident, predicted.id, cell.names as id variables
## Using tissue, paper.ident, predicted.id, cell.names as id variables
## Using tissue, paper.ident, predicted.id, cell.names as id variables